# clear workspace
rm(list=ls())

# set the working directory
setwd (#insert wd link here)

# open necessary libraries
library(foreign)
library(psych)
library(stargazer)
library(Matching)
library(ebal)
library(Zelig)
library(survey)


# read in the data set
anes12 <- read.dta("anes_timeseries_2012_stata12.dta",
                   convert.factors=F)


##########################
###### DATA CLEANING #####
##########################

##### functions used for data cleaning ####
### recode from 0 to 1 ###
### this function recodes variables on a 0-1 scale as is done in 
### Krupnikov and Piston (2014)
recoder <- function(old.var){
  min.old <- min(old.var, na.rm=T)
  max.old <- max(old.var, na.rm=T)
  new.var <- (old.var - min.old) / (max.old - min.old)
  return(new.var)
}


##### dependent variable #####
### turnout variable
anes12$turnout <- anes12$rvote2012_x
anes12$turnout[anes12$turnout == -6] <- NA
anes12$turnout[anes12$turnout == -9] <- NA
anes12$turnout[anes12$turnout == -2] <- NA
anes12$turnout[anes12$turnout == 2] <- 0  
prop.table(table(anes12$turnout))


##### explanatory variables #####
### party id

# 2012
anes12$party.id <- anes12$pid_x
# recoding:
anes12$party.id <- anes12$party.id-1
anes12$party.id[anes12$party.id==-3] <- NA
# create a party id standardized from 0 to 1
anes12$party.id.0.1 <- recoder(anes12$party.id)
# validate
table(anes12$party.id)
table(anes12$party.id.0.1)


### party id dummy variables

anes12$strong.dem[anes12$party.id==NA] <- NA
anes12$weak.dem[anes12$party.id==NA] <- NA
anes12$lean.dem[anes12$party.id==NA] <- NA
anes12$ind[anes12$party.id==NA] <- NA
anes12$lean.rep[anes12$party.id==NA] <- NA
anes12$weak.rep[anes12$party.id==NA] <- NA
anes12$strong.rep[anes12$party.id==NA] <- NA
# strong dem
anes12$strong.dem[!is.na(anes12$party.id)] <- 0
anes12$strong.dem[anes12$party.id==0] <- 1
# weak dem
anes12$weak.dem[!is.na(anes12$party.id)] <- 0
anes12$weak.dem[anes12$party.id==1] <- 1
# lean dem
anes12$lean.dem[!is.na(anes12$party.id)] <- 0
anes12$lean.dem[anes12$party.id==2] <- 1
# independent
anes12$ind[!is.na(anes12$party.id)] <- 0
anes12$ind[anes12$party.id==3] <- 1
# lean republican
anes12$lean.rep[!is.na(anes12$party.id)] <- 0
anes12$lean.rep[anes12$party.id==4] <- 1
# weak republican
anes12$weak.rep[!is.na(anes12$party.id)] <- 0
anes12$weak.rep[anes12$party.id==5] <- 1
# strong republican
anes12$strong.rep[!is.na(anes12$party.id)] <- 0
anes12$strong.rep[anes12$party.id==6] <- 1

## Prejudice 
prej.varlist12 <- colnames(with(anes12,cbind(stype_hwkwhite,stype_hwkblack,stype_intwhite,stype_intblack)))

anes12$stype_hwkwhite[anes12$stype_hwkwhite < 1]  <- NA
anes12$stype_hwkblack[anes12$stype_hwkblack < 1]  <- NA
anes12$stype_intwhite[anes12$stype_intwhite < 1]  <- NA
anes12$stype_intblack[anes12$stype_intblack < 1]  <- NA


# create the prejudice scale
lazy.scale12 <- with(anes12,(stype_hwkblack - stype_hwkwhite + 6)/12)
describe(lazy.scale12)
intelligent.scale12 <- with(anes12,(stype_intblack - stype_intwhite + 6)/12)
describe(intelligent.scale12)
anes12$prejudice <- (lazy.scale12 + intelligent.scale12) / 2
describe(anes12$prejudice)


### age

# in years
anes12$age <- anes12$dem_age_r_x
anes12$age[anes12$age < 18] <- NA
describe(anes12$age)
# recoded 0-1
anes12$age.0.1 <- recoder(anes12$age)
describe(anes12$age.0.1)



### education

#dem_edu categories -> grade equivalents
anes12$educ <- anes12$dem_edu
anes12$educ[anes12$educ < 0] <- NA
anes12$educ[anes12$educ == 95] <- NA
anes12$educ[anes12$educ == 16] <- 17
anes12$educ[anes12$educ == 15] <- 17
anes12$educ[anes12$educ == 14] <- 17
anes12$educ[anes12$educ == 13] <- 16
anes12$educ[anes12$educ == 12] <- 14
anes12$educ[anes12$educ == 11] <- 14
anes12$educ[anes12$educ == 10] <- 14
anes12$educ[anes12$educ == 9] <- 12
anes12$educ[anes12$educ == 8] <- 12
anes12$educ[anes12$educ == 7] <- 11
anes12$educ[anes12$educ == 6] <- 10
anes12$educ[anes12$educ == 5] <- 9
anes12$educ[anes12$educ == 4] <- 8
anes12$educ[anes12$educ == 3] <- 6
anes12$educ[anes12$educ == 2] <- 3
anes12$educ[anes12$educ == 1] <- 0

describe(anes12$educ)
# recoded 0-1
anes12$educ.0.1 <- recoder(anes12$educ)
describe(anes12$educ.0.1)

### gender: female coded =1
anes12$female <- anes12$gender_respondent_x - 1
table(anes12$female)


### income
anes12$income <- anes12$incgroup_prepost_x  
anes12$income[anes12$income < 0] <- NA
anes12$income.0.1 <- recoder(anes12$income)
describe(anes12$income.0.1)


### currently married
anes12$married <- NA
anes12$married[anes12$dem_marital %in% 2:6] <- 0
anes12$married[anes12$dem_marital==1] <- 1
table(anes12$married)


### south
states.in.south <- c("FL","SC","AL","MS","GA","LA","TX","VA","AR","TN","NC")
anes12$south <- 0
anes12$south[anes12$sample_state %in% states.in.south] <- 1
table(anes12$south)


### interest
anes12$interest <- NA
anes12$interest[anes12$interest_attention==1] <- 1
anes12$interest[anes12$interest_attention==2] <- 0.67
anes12$interest[anes12$interest_attention==3] <- 0.33
anes12$interest[anes12$interest_attention==4] <- 0
anes12$interest[anes12$interest_attention==5] <- 0
table(anes12$interest)


### contacted by party
anes12$contact <- NA
anes12$contact[anes12$mobilpo_party==1] <- 1
anes12$contact[anes12$mobilpo_party==2] <- 0
table(anes12$contact)


### length of resident in R's community
anes12$residence <- anes12$dem3_yearscomm  # TR: different cutoff points, max=40 for 2012
anes12$residence[anes12$dem3_yearscomm <0] <- NA
table(anes12$residence)
anes12$residence.0.1 <- recoder(anes12$residence)
describe(anes12$residence.0.1)



### external efficacy

# efficpo_carestd: public officials don't care about what people think
anes12$officials.care <- NA
# agree strongly coded =0
anes12$officials.care[anes12$efficpo_carestd==1] <- 0
# agree somewhat coded =.25
anes12$officials.care[anes12$efficpo_carestd==2] <- .25
# neither agree nor disagree coded =.50
anes12$officials.care[anes12$efficpo_carestd==3] <- .50
# disagree somewhat coded =.75
anes12$officials.care[anes12$efficpo_carestd==4] <- .75
# disagree strongly coded =1
anes12$officials.care[anes12$efficpo_carestd==5] <- 1

# efficpo_carerev: officials care what people like you think
# a great deal coded =1
anes12$officials.care[anes12$efficpo_carerev==1] <- 1
# a lot coded =.75
anes12$officials.care[anes12$efficpo_carerev==2] <- .75
# a moderate amount coded =.50
anes12$officials.care[anes12$efficpo_carerev==3] <- .50
# a little coded =.25
anes12$officials.care[anes12$efficpo_carerev==4] <- .25
# not at all coded =0
anes12$officials.care[anes12$efficpo_carerev==5] <- 0
# validate
table(anes12$officials.care)


## people have a (no) say in what government does
# efficpo_saystd: People don't have any say about what government does.
anes12$say.in.gov <- NA
# agree strongly coded =0
anes12$say.in.gov[anes12$efficpo_saystd==1] <- 0
# agree somewhat coded =.25
anes12$say.in.gov[anes12$efficpo_saystd==2] <- .25
# neither agree nor disagree coded =.50
anes12$say.in.gov[anes12$efficpo_saystd==3] <- .50
# disagree somewhat coded =.75
anes12$say.in.gov[anes12$efficpo_saystd==4] <- .75
# disagree strongly coded =1
anes12$say.in.gov[anes12$efficpo_saystd==5] <- 1

# efficpo_sayrev: How much can people affect what government does? 
# a great deal coded =1
anes12$say.in.gov[anes12$efficpo_sayrev==1] <- 1
# a lot coded =.75
anes12$say.in.gov[anes12$efficpo_sayrev==2] <- .75
# a moderate amount coded =.50
anes12$say.in.gov[anes12$efficpo_sayrev==3] <- .50
# a little coded =.25
anes12$say.in.gov[anes12$efficpo_sayrev==4] <- .25
# not at all coded =0
anes12$say.in.gov[anes12$efficpo_sayrev==5] <- 0
# validate
table(anes12$say.in.gov)
## combine officials.care and say.in.gov for external efficacy index
anes12$external.efficacy <- (anes12$officials.care + anes12$say.in.gov) / 2
table(anes12$external.efficacy)



### does R think presidential race will be close
anes12$close.race <- NA
anes12$close.race[anes12$preswin_close==1] <- 1
anes12$close.race[anes12$preswin_close==2] <- 0
table(anes12$close.race)


### affect for candidate
anes12$obama.therm <- anes12$ft_dpc
anes12$obama.therm[anes12$obama.therm < 0] <- NA
anes12$romney.therm <- anes12$ft_rpc   
anes12$romney.therm[anes12$romney.therm < 0] <- NA
anes12$affect <- anes12$obama.therm - anes12$romney.therm


##### VARIABLES FOR SUBSETTING AND WEIGHTS #####
### race

# non-hispanic white
anes12$nh.white <- NA
anes12$nh.white[anes12$dem_raceeth_x>0] <- 0
anes12$nh.white[anes12$dem_raceeth_x==1] <- 1
table(anes12$nh.white)
# white
anes12$white <- NA
anes12$white[anes12$dem_raceeth_x>0] <- 0
anes12$white[anes12$dem_raceeth_x==1] <- 1
anes12$white[anes12$dem_raceeth_x==3] <- 1
table(anes12$white)


### survey weights
anes12$weight <- anes12$weight_full

### psu
anes12$psu <- anes12$psu_full

# Logit function for replication of coefficients
logit.Krup.Pist <- function(data.matrix){
  glm(data.matrix[,1] ~ data.matrix[,2:(NCOL(data.matrix)-2)], 
      family = "binomial"(link = "logit"),
      weight=data.matrix[,NCOL(data.matrix)-1])
}

# weave standard errors to get same as stata
weave.se <- function(model){
  sqrt(diag(weave(model)))
}

# covariate labels for data presentation 
cov.labels <- c("Prejudice","Strong Dem",
                "Strong Dem x Prej","Age","Education","Female",
                "Income","Married","Years in residence","South",
                "Efficacy","Interest","Contacted by party",
                "Perceive election to be close")


####################
##### ANALYSIS #####
####### 2012 #######
####################


### MODELS 1-6 in TABLE 1 ###
# variables for models 1,2,3,4
model.1234.X.12 <- as.matrix(with(anes12,cbind(
  turnout,prejudice,age.0.1,educ.0.1,female,income.0.1,married,residence.0.1,
  south,external.efficacy,interest,contact,close.race,weight,psu)))
colnames(model.1234.X.12) <- c("Turnout","Prejudice","Age","Education","Female",
                               "Income","Married","Years in residence","South",
                               "Efficacy","Interest","Contacted by party",
                               "Perceive election to be close","weight","psu")
# variables for model 5
model.5.X.12 <- as.matrix(with(anes12,cbind(
  turnout,prejudice,strong.dem,prejudice*strong.dem,age.0.1,educ.0.1,female,
  income.0.1,married,residence.0.1,south,external.efficacy,interest,
  contact,close.race,weight,psu)))
colnames(model.5.X.12) <- c("Turnout","Prejudice","Strong Dem",
                            "Strong Dem x Prej","Age","Education","Female",
                            "Income","Married","Years in residence","South",
                            "Efficacy","Interest","Contacted by party",
                            "Perceive election to be close","weight","psu")
# variables for model 6
model.6.X.12 <- as.matrix(with(anes12,cbind(
  turnout,prejudice,strong.dem,prejudice*strong.dem,weight,psu)))
colnames(model.6.X.12) <- c("Turnout","Prejudice","Strong Dem",
                            "Strong Dem x Prej","weight","psu")

# model 1: subset to strong dems (and whites)
model.1.X.12 <- na.omit(subset(model.1234.X.12,anes12$strong.dem==1 & anes12$nh.white==1))
# model 2: subset to weak/lean dems (and whites)
model.2.X.12 <- na.omit(subset(model.1234.X.12,(anes12$weak.dem==1 | anes12$lean.dem==1) &
                                 anes12$nh.white==1))
# model 3: subset to independents (and whites)
model.3.X.12 <- na.omit(subset(model.1234.X.12,anes12$ind==1 & anes12$nh.white==1))
# model 4: subset to republicans (and whites)
model.4.X.12 <- na.omit(subset(model.1234.X.12,(anes12$strong.rep==1 | anes12$weak.rep==1 |
                                                  anes12$lean.rep==1) & anes12$nh.white==1))
# model 5: all whites
model.5.X.12 <- na.omit(subset(model.5.X.12,anes12$nh.white==1))
# model 6: all whites
model.6.X.12 <- na.omit(subset(model.6.X.12,anes12$nh.white==1))

# run the function for each model
results12.mod1 <- logit.Krup.Pist(model.1.X.12)
results12.mod2 <- logit.Krup.Pist(model.2.X.12)
results12.mod3 <- logit.Krup.Pist(model.3.X.12)
results12.mod4 <- logit.Krup.Pist(model.4.X.12)
results12.mod5 <- logit.Krup.Pist(model.5.X.12)
results12.mod6 <- logit.Krup.Pist(model.6.X.12)

results12.list <- list(results12.mod1,results12.mod2,results12.mod3,results12.mod4,results12.mod5,results12.mod6)
se12.list <- lapply(results12.list, weave.se)

stargazer(results12.list,se=se12.list,
          title=c("Prejudice and White turnout in the 2012 election, by partisan group (based on preliminary ANES release)"),
          covariate.labels=cov.labels,dep.var.labels=c("Turnout"),column.sep.width="2pt")
